# RWR for full_sample
rm(list = ls())
library(tidyverse)
library(caret)
library(haven)
library(stringr)
library(survey)
library(splines)
library(rlang)
library(lmtest)
library(sandwich)
library(broom)
library(MASS)
library(matrixStats)
library(naniar)
library(VIM)
library(mice)
set.seed(02138)

# Set working directories
#please set your own directory! 
#setwd("~/Dropbox (Harvard University)/Gov 2001 Rep Paper/R Scripts") 
#setwd("~/Dropbox (Harvard University)/Gov_2001_Rep/R Scripts") 


# All data
mct<-readRDS("d2.RDS") %>% mutate(mediator = 1) %>% rename(namea = country_text_a, nameb = cabb_b, statea = country_code_a, stateb = ccode_b)
control_gp_all_vars <- readRDS("control_gp_all_vars.RDS") %>% mutate(mediator = 0) %>% rename(contig_ks = conttype, alliance_gg = score, year = strtyr)

# filter(demonstration==1)

# Factor variable using polity 2 (polity 4)
full_sample_case_control <- bind_rows(mct, control_gp_all_vars) %>% mutate(demdem_polity = as.factor(demo_a*demo_b),
                                                                           polity_a = factor(demo_a), polity_b = factor(demo_b))

### Old code using polity 3!
# full_sample_case_control <- full_sample %>%mutate(full_sample, polity_a = if_else(democ_a >5, 1, 0)) %>%
#   mutate(full_sample, polity_b = if_else(democ_b >5, 1, 0)) %>%
#   mutate(full_sample, demdem_polity = polity_a*polity_b) %>%
#   mutate(polity_a = factor(polity_a), polity_b = factor(polity_b), demdem_polity = factor(demdem_polity)) 

saveRDS(full_sample_case_control, file = "full_sample_case_control.RDS")

summary(full_sample_case_control$demonstration[full_sample_case_control$mediator==1])


###############################

#####################################
### Sample  MI or dropping vars of interest
#####################################

## (1) DROPPING

rm(list = ls())
full_sample_case_control <- readRDS("full_sample_case_control.RDS")

post_t_b <- c("recruit" ) # women in lower house dropped as too many NAs
post_t_b_no_quotes <- vars(recruit)
post_t_b_rhs <- (map(post_t_b_no_quotes, get_expr) %>% reduce(~ expr(!!.x + !!.y)))

post_t_c <- c("female_suffrage", "milex_quintile", "reg_power_rank",  "e_peaveduc" )
post_t_c_no_quotes <- vars(female_suffrage, milex_quintile, reg_power_rank, e_peaveduc)

# post_t_c_fewer <- c("milex_quintile", "reg_power_rank")
# post_t_c_fewer_no_quotes <- vars(milex_quintile, reg_power_rank)
# post_t_c_fewer_rhs <- (map(post_t_c_fewer_no_quotes, get_expr) %>% reduce(~ expr(!!.x + !!.y)))

conf <- c("major_a", "alliance_gg", "cinc_a", "cinc_b", "contig_ks") 
conf_noquote <- vars(major_a, alliance_gg, cinc_a, cinc_b, contig_ks)  
conf_rhs <- map(conf_noquote, get_expr) %>% reduce(~ expr(!!.x + !!.y))

full_sample_3 <- dplyr:: select(full_sample_case_control, !!!post_t_b_no_quotes, !!!post_t_c_no_quotes, !!!conf_noquote, 
                                polity_a, polity_b, demdem_polity, mediator, compliance, democ_a, democ_b,) #keeping democ variables as need for dml

miss_var_summary(full_sample_3) %>% print(n = 100) 

# Drop NAs for above - consider imputation
full_sample_case_control_dropped <- full_sample_3 %>%
  drop_na(!!!post_t_b_no_quotes, !!!post_t_c_no_quotes, !!!conf_noquote, 
          polity_a, polity_b, demdem_polity)

nrow(full_sample_case_control_dropped) #402 - HMM CHECK! or 711 if dnt incdue conf-noquote

saveRDS(full_sample_case_control_dropped, file = "full_sample_case_control_dropped.RDS")

save.image(file = "03b_prep.RData")




###### (1) MI
# Explore missings
full_sample_2 <- full_sample_3
aggr_plot <- aggr(dplyr::select(full_sample_2, -compliance), col=c('navyblue','red'), numbers=TRUE, sortVars=TRUE, labels=names(data), cex.axis=.7, gap=3, ylab=c("Histogram of missing data","Pattern"))
miss_var_summary(full_sample_2) %>% print(n = 100)

I <- 10

full_sample_mi <- full_sample_2 %>%
  dplyr::select(-c(demdem_polity, compliance, mediator)) %>%
  mutate(polity_a = as.double(as.integer(full_sample_2$polity_a)==2), 
         polity_b = as.double(as.integer(full_sample_2$polity_b)==2)) %>%
  mice(m = I, maxit = 10, defaultMethod = "rf")

imputed_full <- map(1:I, ~ complete(full_sample_mi, .x)) %>%
  map(., ~ mutate(.x, mediator = full_sample_2$mediator, compliance = full_sample_2$compliance, demdem_polity = polity_a*polity_b))

nrow(imputed_full[[1]]) #1504 observations

saveRDS(imputed_full, file = "full_sample_case_control_imputed.RDS")

## VECTORS OF VARS USED IN ANALYSES

### Currently dropped vars: (tho included below)
# women lower house (lots NA)
# tau (function of alliance scores)
# capshare (function of cinc)
# demdem - if both countries are democracies, majmin - major vs. minor powers -
# contiguity 



